rm(list=ls(all=TRUE))

################################################################################
###### look at differences in ideology among house members
################################################################################

daH <- fread("Data/Hall_votes.csv" , stringsAsFactors = FALSE)
daH <- subset(daH , congress > 34 & congress < 93 & chamber=="House")
daH$roll_call <- paste0(daH$congress , "_", daH$rollnumber)

daH_temp <- read.csv("Data/Hall_members.csv" , stringsAsFactors = FALSE)
head(daH_temp)

daH <- join(daH , subset(daH_temp , select = c("congress","icpsr","state_abbrev","party_code","bioname")) , by = c("congress","icpsr"))
daH <- subset(daH , state_abbrev!="USA") %>%
  mutate(. , bioname = toupper(bioname))

### cast codes are 1 (yea; 2 paired 3 announced) 6 (nay; 4 announced 5 paired) 7/8 present and 9 abstain
### i'm trying to match exactly the procedure used in the original for constructing deviation
daH$yea <- NA
daH$yea[daH$cast_code%in%c(1,2,3)] <- 1
daH$yea[daH$cast_code%in%c(4,5,6)] <- 0
### original paper considers 1,2,3 as yeas
daH$not_voting <- as.numeric(daH$cast_code==9)

daH$r_party <- "other"
daH$r_party[daH$party_code==100] <- "Democrat"
daH$r_party[daH$party_code==200] <- "Republican"
table(daH$party_code)

daH <- as.data.table(daH)
daH <- daH[, `:=` (dem_yes = sum(na.omit(yea[r_party=="Democrat"])),
                   dem_voters = length(na.omit(yea[r_party=="Democrat"])),
                   rep_yes = sum(na.omit(yea[r_party=="Republican"])),
                   rep_voters = length(na.omit(yea[r_party=="Republican"])),
                   other_yes = sum(na.omit(yea[!r_party%in%c("Democrat","Republican")])),
                   other_voters = length(na.omit(yea[!r_party%in%c("Democrat","Republican")]))
) , by = roll_call ]

daH$party_vote <- 0
daH$party_vote[daH$r_party=="Democrat" & (daH$dem_yes - daH$yea) > (daH$dem_voters - daH$dem_yes - (1- daH$yea))] <- 1
daH$party_vote[daH$r_party=="Republican" & (daH$rep_yes - daH$yea) > (daH$rep_voters - daH$rep_yes - (1- daH$yea))] <- 1
daH$party_vote[daH$r_party=="Democrat" & (daH$dem_yes - daH$yea) == (daH$dem_voters - daH$dem_yes - (1- daH$yea))] <- NA
daH$party_vote[daH$r_party=="Republican" & (daH$rep_yes - daH$yea) == (daH$rep_voters - daH$rep_yes - (1- daH$yea))] <- NA


daH$deviate <- as.numeric(daH$yea!=daH$party_vote)
daH$deviate[is.na(daH$yea)] <- NA
daH$deviate[daH$cast_code%in%c(7,8)] <- 1

daH <- daH[order(daH$roll_call , daH$bioname),]
daH <- daH[, `:=` (rank_p_overall = (seq_along(bioname)-1)/(length(bioname)-1)) , by = roll_call ]

### add vote margin
daH <- mutate(daH , yea_perc = (dem_yes + rep_yes + other_yes)/(dem_voters + rep_voters + other_voters)) %>% 
  mutate(. , close = as.numeric(yea_perc >= 0.35 & yea_perc <= 0.65))

daH2 <- subset(daH , !cast_code%in%c(9)) %>% as.data.table
daH2 <- daH2[order(daH2$roll_call , daH2$bioname),]
daH2 <- daH2[, `:=` (rank_p = (seq_along(bioname)-1)/(length(bioname)-1)) , by = roll_call ]



feols(deviate ~ rank_p  | icpsr, data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress")

feols(deviate ~ rank_p  | interaction(icpsr,congress), data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress")

feols(deviate ~ rank_p_overall  | icpsr, data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress")

feols(deviate ~ rank_p_overall  | interaction(icpsr,congress), data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress")

ddply(daH2 , .(icpsr) , function(x) sd(x$rank_p_overall)) %>% with(. , na.omit(V1)) %>% mean
mean(na.omit(daH2$deviate))

ddply(da , .(icpsr) , function(x) sd(x$rank_p_overall)) %>% with(. , na.omit(V1)) %>% mean
subset(da2 , congress < 93) %>% 
  with(. , na.omit(deviate)) %>% mean


#####################################################################
#############    Randomization inference - table 6   ##############
#####################################################################
### 
nsims <- 10000
output1rc <- output1 <- data.frame(matrix(NA , nrow = nsims , ncol = 24))

### have to use full data to permute names, to directly replicate original analysis
names.unique <- unique(daH$bioname)

set.seed(12345)
for(i in 1:nsims){
  temp <- data.frame(bioname = names.unique,
                     name2 = sample(names.unique , length(names.unique) , replace = FALSE)) %>% as.data.table()
  da_sim <- join(daH , temp , by = "bioname")
  
  for(k in 1:2){
    if(k==1){
      da_sim <- da_sim[order(da_sim$roll_call , da_sim$name2),]
      da_sim2 <- da_sim[, `:=` (rank_p_sim = (seq_along(name2)-1)/(length(name2)-1)) , by = roll_call ]
    }
    if(k==2){
      da_sim2 <- subset(da_sim , !cast_code%in%c(9))
      da_sim2 <- da_sim2[order(da_sim2$roll_call , da_sim2$name2),]
      da_sim2 <- da_sim2[, `:=` (rank_p_sim = (seq_along(name2)-1)/(length(name2)-1)) , by = roll_call ]
    }
    
    da_sim3 <- subset(da_sim2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other" & !is.na(deviate))
    
    tttemp <- matrix(NA , nrow = 1 , ncol = 24)
    ttemp <- feols(deviate ~ rank_p_sim  | name2, data = da_sim3)
    ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
    tttemp[1,1:4] <- unlist(ttemp1)
    
    ttemp <- feols(deviate ~ rank_p_sim  | interaction(name2 , congress), data = da_sim3)
    ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
    tttemp[1,5:8] <- unlist(ttemp1)
    
    ttemp <- feols(deviate ~ rank_p_sim  | name2, data = subset(da_sim3 , close==1))
    ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
    tttemp[1,9:12] <- unlist(ttemp1)
    
    ttemp <- feols(deviate ~ rank_p_sim  | interaction(name2 , congress), data = subset(da_sim3 , close==1))
    ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
    tttemp[1,13:16] <- unlist(ttemp1)
    
    ttemp <- feols(deviate ~ rank_p_sim  | name2, data = subset(da_sim3 , close==0))
    ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
    tttemp[1,17:20] <- unlist(ttemp1)
    
    ttemp <- feols(deviate ~ rank_p_sim  | interaction(name2 , congress), data = subset(da_sim3 , close==0))
    ttemp1 <- summary(ttemp , cluster = "congress") %>% coeftable(.)
    tttemp[1,21:24] <- unlist(ttemp1)
    
    if(k==1) output1[i,] <- tttemp
    if(k==2) output1rc[i,] <- tttemp
    }  
  cat("\r", i, "of", nsims) ### How far loop has progressed
}


save(daH, daH2 , output1,output1rc,
     file = "Data/Temp Files/BIW_house_RI_sims.RData")

################################################################################
####################### LOAD DATA AND START FROM HERE ##########################
################################################################################
load(file = "Data/Temp Files/BIW_house_RI_sims.RData")

sd(output1[,5])
sd(output1rc[,5])

### Table 2 (US House only)
table2H <- matrix(NA , nrow = 2 , ncol = 4)

### reproduce CDFs from simulations. Ranking Among all Senators First
### original data


estimate <- feols(deviate ~ rank_p  | icpsr, data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table2H[1,1] <- estimate[1,1]
table2H[2,1] <- mean(abs(output1rc[,1]) >= abs(estimate[1,1]))



estimate <- feols(deviate ~ rank_p  | interaction(icpsr,congress), data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table2H[1,2] <- estimate[1,1]
table2H[2,2] <- mean(abs(output1rc[,5]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | icpsr, data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table2H[1,3] <- estimate[1,1]
table2H[2,3] <- mean(abs(output1[,1]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(icpsr,congress), data = subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table2H[1,4] <- estimate[1,1]
table2H[2,4] <- mean(abs(output1[,5]) >= abs(estimate[1,1]))


sink("Generated Files/Replicated Table 2 House.txt")
table2H %>% round(. , 3)
sink(file = NULL)


### Table 3 (US House only)
### close and lopsided us house votes


mean(subset(daH2 , !cast_code%in%c(2,3,4,5,9) & r_party!="other")$close)

table3H <- matrix(NA , nrow = 4 , ncol = 4)
# close
estimate <- feols(deviate ~ rank_p  | icpsr, data = subset(daH2 , close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[1,1] <- estimate[1,c(1)]
table3H[2,1] <- mean(abs(output1rc[,9]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p  | interaction(icpsr,congress), data = subset(daH2 , close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[1,2] <- estimate[1,1]
table3H[2,2] <- mean(abs(output1rc[,13]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | icpsr, data = subset(daH2 , close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[1,3] <- estimate[1,1]
table3H[2,3] <- mean(abs(output1[,9]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(icpsr,congress), data = subset(daH2 , close==1 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[1,4] <- estimate[1,1]
table3H[2,4] <- mean(abs(output1[,13]) >= abs(estimate[1,1]))

# lopsided
estimate <- feols(deviate ~ rank_p  | icpsr, data = subset(daH2 , close==0& !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[3,1] <- estimate[1,c(1)]
table3H[4,1] <- mean(abs(output1rc[,17]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p  | interaction(icpsr,congress), data = subset(daH2 , close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[3,2] <- estimate[1,1]
table3H[4,2] <- mean(abs(output1rc[,21]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | icpsr, data = subset(daH2 , close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[3,3] <- estimate[1,1]
table3H[4,3] <- mean(abs(output1[,17]) >= abs(estimate[1,1]))


estimate <- feols(deviate ~ rank_p_overall  | interaction(icpsr,congress), data = subset(daH2 , close==0 & !cast_code%in%c(2,3,4,5,9) & r_party!="other")) %>%
  summary(. , cluster = "congress") %>% coeftable()
table3H[3,4] <- estimate[1,1]
table3H[4,4] <- mean(abs(output1[,21]) >= abs(estimate[1,1]))

sink("Generated Files/Replicated Table 3 House.txt")
table3H %>% round(. , 3)
sink(file = NULL)



